
from __future__ import with_statement
import re
import os, os.path
import datetime
import csv

def filepaths(top_path):
    for dirpath, subdirs, files in os.walk(top_path):
        for f in files:
            yield f, os.path.join(dirpath, f)

#oldpath = 'C:\Documents and Settings\John Sheffield\Desktop\Simmons RA\EuroBriefs'
#newpath = 'C:\Documents and Settings\John Sheffield\Desktop\Simmons RA\EuroFinal'
#infopath = 'C:\Documents and Settings\John Sheffield\Desktop\Simmons RA'
oldpath = 'C:\\Documents and Settings\Rich\Desktop\Rewards for Ratification\\text analysis\EuroBriefs14jul2010'
newpath = 'C:\\Documents and Settings\Rich\Desktop\Rewards for Ratification\\text analysis\EuroFinal14jul2010'
infopath = 'C:\\Documents and Settings\Rich\Desktop\Rewards for Ratification\\text analysis'
count = 0

fkeys = []
for name, path in filepaths(oldpath):
    with open(path) as f:
        brief = f.read()

    # Increment count
    count = count+1
    if(count % 500 == 0):
        print "Starting document ",count

    # Pull out the date
    try:
        ## with the previous file changed, this changes too
        #thisdate = brief.split('Date:&nbsp;',1)[1].split('HTML:',1)[0]
        thisdate = brief.split('DATE = ',1)[1].split('\n',1)[0]
        textdate = re.sub('\s', '', thisdate)
        myday = textdate[0:2]
        mymon = textdate[3:5]
        myyr = textdate[6:]
        mydatetime = datetime.date(int(myyr),int(mymon),int(myday))
        mydateform = str(myyr) + str(mymon) + str(myday)
        #mydate = mydateform
        mydate = thisdate
    except:
        mydate = 'SCREWUP-SCREWUP-SCREWUP'
        print(path + " has a problem with dates")

    # Creates and appends the file key to match filename and briefdate
    currentfkey = name + "," + mydate + '\n'
    fkeys.append(currentfkey)
    
    # Getting rid of junk strings
    #step1brief = brief.split('DOC:&nbsp;&nbsp;&nbsp;',1)[1]
    step1brief = re.sub('\n>', '', brief)
    step2brief = re.sub('\n', '', step1brief)
    step3brief = re.sub('\t', '', step2brief)
    finalbrief = re.sub("DATE = " + mydate,'',step3brief)

    # Printing final brief with date in header
    newfile = open(os.path.join(newpath,name), 'w')
    newfile.writelines("DATE = " + mydate + "\n")
    newfile.write(finalbrief)
    newfile.close()

# Output all file keys
filekeys = open(os.path.join(infopath,'filekeys.txt'), 'w')
for key in fkeys:
    filekeys.writelines(key)
filekeys.close()
